//before running the following commands, first import the processed publications and patents data produced by the two corresponding SQL scripts and save them in Stata format (.dta) with your preferred file names. 
use "[publication data file name].dta"

//generating log normalized values of historical and cohorot frequency to be used in Tableau for Figure 3
gen lnMedianRefPairCumFreqL4=ln(MedianRefPairCumFreqL4+1)
gen lnMedianRefPairFreqL1=ln(MedianRefPairFreqL1+1)

//generating summary statistics from publications data to be used in Tableau for Figure 4
//indicator for whether a paper is above the median in terms of historical frequency
gen MedianRefPairCumFreqL4_am8018=0 if PubYear>=1980 & PubYear<=2018 & MedianRefPairCumFreqL4!=.
replace MedianRefPairCumFreqL4_am8018=1 if MedianRefPairCumFreqL4>248 & PubYear>=1980 & PubYear<=2018
//indicator for whether a paper is above the median in terms of cohorot frequency
gen MedianRefPairFreqL1_am8018=0 if PubYear>=1980 & PubYear<=2018 & MedianRefPairFreqL1!=.
replace MedianRefPairFreqL1_am8018=1 if MedianRefPairFreqL1>54  & PubYear>=1980 & PubYear<=2018

//generating summary statistics from publications data to be used in Tableau for Figure 4. The values can either be directly copy-pasted to Tableau or first moved to an Excel file and then imported into Tableau. 
sum Top5pct_5yr nobelpaper if MedianRefPairFreqL1_am8018!=. & MedianRefPairCumFreqL4_am8018!=. 
sum Top5pct_5yr nobelpaper if MedianRefPairFreqL1_am8018==0 & MedianRefPairCumFreqL4_am8018==0
sum Top5pct_5yr nobelpaper if MedianRefPairFreqL1_am8018==1 & MedianRefPairCumFreqL4_am8018==0
sum Top5pct_5yr nobelpaper if MedianRefPairFreqL1_am8018==0 & MedianRefPairCumFreqL4_am8018==1
sum Top5pct_5yr nobelpaper if MedianRefPairFreqL1_am8018==1 & MedianRefPairCumFreqL4_am8018==1

//generating statistics to be used in Tableau for Figure 5. The values can either be directly copy-pasted to Tableau or first moved to an Excel file and then imported into Tableau. 
reghdfe Top5pct_5yr i.MedianRefPairCumFreqL4_am8018#i.MedianRefPairFreqL1_am8018 if (Biology==1 | Medicine==1), noabs
margins i.MedianRefPairCumFreqL4_am8018#i.MedianRefPairFreqL1_am8018
sum Top5pct_5yr if e(sample)
reghdfe Top5pct_5yr i.MedianRefPairCumFreqL4_am8018#i.MedianRefPairFreqL1_am8018  if (Biology==1 | Medicine==1) , abs(LastAuthorID)
margins i.MedianRefPairCumFreqL4_am8018#i.MedianRefPairFreqL1_am8018
sum Top5pct_5yr if e(sample)




use "[patent data file name].dta"

//generating log normalized values of historical and cohorot frequency to be used in Tableau for Figure S2
gen lnMedianRefPairCumFreqL4=ln(MedianRefPairCumFreqL4+1)
gen lnMedianRefPairFreqL1=ln(MedianRefPairFreqL1+1)

//generating summary statistics from patents data to be used in Tableau for Figure 4
//indicator for whether a patent is above the median in terms of historical frequency
gen MedianRefPairCumFreqL4_am8018=0 if appyear>=1980 & appyear<=2018
replace MedianRefPairCumFreqL4_am8018=1 if MedianRefPairCumFreqL4> 4276 & appyear>=1980 & appyear<=2018
//indicator for whether a patent is above the median in terms of cohorot frequency
gen MedianRefPairFreqL1_am8018=0 if appyear>=1980 & appyear<=2018
replace MedianRefPairFreqL1_am8018=1 if MedianRefPairFreqL1> 852 & appyear>=1980 & appyear<=2018

//generating summary statistics from patents data to be used in Tableau for Figure 4. The values can either be directly copy-pasted to Tableau or first moved to an Excel file and then imported into Tableau. 
sum Top5pct_5yr if MedianRefPairFreqL1_am8018!=. & MedianRefPairCumFreqL4_am8018!=. 
sum Top5pct_5yr if MedianRefPairFreqL1_am8018==0 & MedianRefPairCumFreqL4_am8018==0
sum Top5pct_5yr if MedianRefPairFreqL1_am8018==1 & MedianRefPairCumFreqL4_am8018==0
sum Top5pct_5yr if MedianRefPairFreqL1_am8018==0 & MedianRefPairCumFreqL4_am8018==1
sum Top5pct_5yr if MedianRefPairFreqL1_am8018==1 & MedianRefPairCumFreqL4_am8018==1

